%matplotlib inline
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
#url = 'https://twelvedata.com/markets/256458/crypto/huobi/eth-btc/historical-data?start_date=2025-09-01&end_date=2025-09-03&interval=30min' # Replace with your URL
#tables = pd.read_html(url)
#url1 = 'https://twelvedata.com/markets/256458/crypto/huobi/eth-btc/historical-data?start_date=2025-09-01&end_date=2025-09-03&interval=30min&page=2'
#tables1 = pd.read_html(url1)
##tbls = pd.concat([tables[0], tables1[0]])
#tbls.to_csv('huobi_eth_bts.csv')
#from google.colab import drive
#drive.mount('/content/gdrive', force_remount=True)
#ls gdrive/MyDrive/
#%cd /content/gdrive/MyDrive/Colab Notebooks
##huobi trades data for auxiliary graph
tbls = pd.read_csv("huobi_eth_bts.csv")
tbls['timestamp'] = pd.to_datetime(tbls['Date'].values, utc=True)
tbls.head()
#%cd /home
#%pwd
#from google.colab import files
#uploaded = files.upload()
#for k in uploaded:
# print(k)
#import io
#trds = pd.read_csv(io.StringIO(uploaded['eth-btc-trades.csv'].decode('utf-8')))
#lob = pd.read_csv(io.StringIO(uploaded['eth-btc-orderbooks.csv'].decode('utf-8')))
trds = pd.read_csv('eth-btc-trades.csv')
lob = pd.read_csv('eth-btc-orderbooks.csv')
trds.shape, lob.shape
lob.head()
np.unique([len(eval(i)) for i in lob.bids]), np.unique([len(eval(i)) for i in lob.asks])
fun = lambda x: {'ask_'+k+'_'+str(ind): v for ind, d in enumerate(eval(x)) for k, v in d.items()}
df_asks = pd.DataFrame.from_records(lob.asks.apply(fun))
df_asks.head()
fun = lambda x: {'bids_'+k+'_'+str(ind): v for ind, d in enumerate(eval(x)) for k, v in d.items()}
df_bids = pd.DataFrame.from_records(lob.bids.apply(fun))
df_bids.head()
df_bids_asks = pd.concat([df_bids, df_asks], axis=1)
df_bids_asks['timestamp'] = pd.to_datetime(lob.timestamp.values)
df_bids_asks.head()
df_list = []
for i in range(50):
pr_name = 'bids_price_'+str(i)
sz_name = 'bids_size_' +str(i)
df1 = df_bids_asks[['timestamp', pr_name, sz_name]].melt(id_vars=['timestamp', sz_name], value_vars=pr_name)
df_list.append(df1.rename(columns={sz_name: 'size'}))
df_melt_bids = pd.concat(df_list)
df_list = []
for i in range(50):
pr_name = 'ask_price_'+str(i)
sz_name = 'ask_size_' +str(i)
df1 = df_bids_asks[['timestamp', pr_name, sz_name]].melt(id_vars=['timestamp', sz_name], value_vars=pr_name)
df_list.append(df1.rename(columns={sz_name: 'size'}))
df_melt_asks = pd.concat(df_list)
df_melt_asks.head()
df_melt_bids_asks = pd.concat([df_melt_bids, df_melt_asks])
df_melt_bids_asks.shape
df_melt_bids_asks.sort_values(by='timestamp', inplace=True)
trds_m = trds.copy()
trds_m['timestamp'] = pd.to_datetime(trds_m.timestamp.values)
trds_m.rename(columns={'price':'value', 'side':'variable'}, inplace=True)
#trds1['size'] = np.log(trds1['size'])
trds_m.head()
df_trds_bids_asks = pd.concat([trds_m, df_melt_bids_asks])
df_trds_bids_asks.sort_values(by='timestamp', inplace=True)
df_trds_bids_asks['size_log'] = np.log2(df_trds_bids_asks['size'].values) -np.log2(np.min(df_trds_bids_asks['size'])) +0.01
df_trds_bids_asks.head()
tbls.rename(columns={'Close':'value'}, inplace=True)
tbls.head()
tbls['timestamp'] = pd.to_datetime(tbls['Date'].values, utc= True)
tbls = tbls[['timestamp', 'value']]
tbls['size'] = 1.1
tbls.head()
tbls['variable'] = 'huobi_close'
tbls['size_log'] = np.log2(tbls['size'])
df_trds_bids_asks_huobi = pd.concat([df_trds_bids_asks, tbls])
df_trds_bids_asks_huobi.sort_values(by = 'timestamp', inplace=True)
trds.groupby('side').count()['size'].plot(kind='bar', title='trades count disbalance by side')
plt.show()
#trades mean size disbalance by side
trds.groupby('side')['size'].mean()
trds[trds['side']=='BUY'].describe()
trds[trds['side']=='SELL'].describe()
#Depth volume ask size stat
df_asks.filter(like='size').sum(axis=1).describe()
#Depth volume bids size stat
df_bids.filter(like='size').sum(axis=1).describe()
fig = px.scatter(df_melt_bids_asks, title = 'LOB data without trading', x="timestamp", y=[ "value"], color= 'variable')
fig.show()
fig = px.scatter(df_melt_bids_asks.iloc[:np.argmax(df_melt_bids_asks['size'].values)], title = 'LOB data without trading where points are proportional to size(until extremely big ask but small after is the same)', x="timestamp", y=[ "value"], color= 'variable', size='size')
fig.show()
fig = px.scatter(df_trds_bids_asks_huobi[(df_trds_bids_asks_huobi['variable']=='BUY') | (df_trds_bids_asks_huobi['variable']=='SELL') |
(df_trds_bids_asks_huobi['variable']=='bids_price_0') | (df_trds_bids_asks_huobi['variable']=='ask_price_0')].iloc[:], title = 'BID/ASK spread and trades', x="timestamp", y=[ "value"], color= 'variable')
fig.show()
We can see that buy trading volume is bigger than sell one by several orders of magnitude and volume of LOB(both sides) as well. And obviously there is no significant (depth)volume of ETH in absolute value in the LOB(smaller than 1 ETH for asks side except one outlier and smaller than 1.8ETH for bids side) and LOB levels are extremly regular(especially bids). This can be in the case of something like wash trading activity(without another side or may be no data). I think we don't see those extremely big asks levels(that matches the trades) because the best asks instantaneously match with respective non bona fide buy orders but the date is sparse. But there is one big ask order about 199ETH so it seems that this is that non bona fide ask order because it has the same order of magnitude as median. I think this behavior may pursue several goals. The first and the main(from my point of view) is to emulate big trading activity(volume) of that place for marketing purposes or for rebate, the second is to abuse market participants that market price will go up but anyway there is no such volume in LOB from ask side.(fake arbitrage oportunities).
It's interesting that there exist theoretic arbitrage oportunities with Huobi for example. But there is now significant volume to realize that as I've already said. This is another reasion that this is manipulative(wash trading) behaviour. We can see in the graph below. I scrap 30min interval OHLC data from Huobi(as they say) for that purpose.
fig = px.scatter(df_trds_bids_asks_huobi[(df_trds_bids_asks_huobi['variable']=='huobi_close') | (df_trds_bids_asks_huobi['variable']=='BUY') | (df_trds_bids_asks_huobi['variable']=='SELL') |
(df_trds_bids_asks_huobi['variable']=='bids_price_0') | (df_trds_bids_asks_huobi['variable']=='ask_price_0')].iloc[:], title = 'BID/ASK spread, trades and Huobi 30min close', x="timestamp", y=[ "value"], color= 'variable')
fig.show()